In [ ]:


In [ ]:
import os
from collections import defaultdict
%matplotlib inline
import matplotlib.pyplot as plt

In [ ]:
import yaml
import re

def safe_yaml_read(fpath, replace_str=''):
    """
    Reads a yaml file stripping all of the jinja templating markup

    Parameters
    ----------
    fpath : str
        Path to yaml file to sanitize

    replace_str : str
        String to replace the template markup with, defaults to ''.

    Returns
    -------
    yaml_dict : dict
        The dictionary with all of the jinja2 templating fields
        replaced with ``replace_str``.
    """
    with open(fpath, 'r') as f:
        lns = []
        for ln in f:
            lns.append(re.sub(r'{[{%].*?[%}]}', '', ln))
    meta_dict = yaml.load(''.join(lns))
    return meta_dict

In [ ]:
recipes_path = os.path.join(os.path.expanduser('~'), 'dev', 'conda', 
                            'conda-prescriptions', 'recipes')

In [ ]:
from yaml.parser import ParserError

all_recipes = defaultdict(dict)
for parent_folder, child_folders, files in os.walk(recipes_path):
    if 'meta.yaml' in files:
        split_path = parent_folder.split(os.sep)
        lib_name = split_path[-2]
        version = split_path[-1]
        try:
            yaml_info = safe_yaml_read(os.path.join(parent_folder, 'meta.yaml'))
        except ParserError as pe:
            print('%s::%s not parseable' % (lib_name, version))
            print('ParserError --> %s' % pe)
            yaml_info = None
        all_recipes[lib_name][version] = yaml_info

# remove python. we need to special case that one...
python_versions = all_recipes.pop('python')

In [ ]:
for recipe, version in sorted(all_recipes.items()):
    print(recipe, sorted(version.keys()))

Looks good! We should separate these into latest_tagged and dev_only


In [ ]:
latest_tagged = defaultdict(dict)
for lib_name, all_versions in all_recipes.items():
    versions = sorted(all_versions.keys())
    if len(versions) == 1:
        version = versions[0]
    else:
        if 'dev' in versions:
            versions.remove('dev')
        version = versions[-1]
    latest_tagged[lib_name][version] = all_versions[version]

In [ ]:
for recipe, version in sorted(latest_tagged.items()):
    print(recipe, sorted(version.keys()))

In [ ]:
dev_only = defaultdict(dict)
for lib_name, all_versions in all_recipes.items():
    if 'dev' in all_versions.keys():
        dev_only[lib_name] = all_versions['dev']

In [ ]:
print(sorted(dev_only.keys()))

In [ ]:
import networkx as nx

In [ ]:
def add_requirements(graph, requirements_list, target_lib):
    graph.add_node(target_lib)
    for req in requirements_list:
        graph.add_node(req)
        graph.add_edge(req, target_lib)

In [ ]:
fig, ax = plt.subplots(ncols=2, nrows=len(dev_only), figsize=(10,4*len(dev_only)))
all_runs_dev_only = nx.DiGraph()
all_builds_dev_only = nx.DiGraph()
for row, (lib, meta) in enumerate(sorted(dev_only.items())):
    run = nx.DiGraph()
    build = nx.DiGraph()
    build_reqs = meta['requirements']['build']
    run_reqs = meta['requirements']['run']
    add_requirements(build, build_reqs, lib)
    add_requirements(run, run_reqs, lib)
    add_requirements(all_builds_dev_only, build_reqs, lib)
    add_requirements(all_runs_dev_only, run_reqs, lib)
    build_ax = ax[row][0]
    row_ax = ax[row][1]
    nx.draw_networkx(build, ax=build_ax)
    nx.draw_networkx(run, ax=row_ax)
    build_ax.set_title("%s Build requirements" % lib)
    row_ax.set_title("%s Run requirements" % lib)

In [ ]:
nx.is_directed_acyclic_graph(all_runs_dev_only)

In [ ]:
nx.is_directed_acyclic_graph(all_builds_dev_only)

Do the same for latest_tagged + dev


In [ ]:
fig, ax = plt.subplots(ncols=2, nrows=len(latest_tagged), figsize=(10,4*len(latest_tagged)))
all_runs_latest_tagged = nx.DiGraph()
all_builds_latest_tagged = nx.DiGraph()
for row, (lib, version) in enumerate(sorted(latest_tagged.items())):
    meta = list(version.values())[0]
    run = nx.DiGraph()
    build = nx.DiGraph()
    build_ax = ax[row][0]
    row_ax = ax[row][1]
    reqs = meta.get('requirements')
    build_ax.set_title("%s Build requirements" % lib)
    row_ax.set_title("%s Run requirements" % lib)
    if reqs:
        build_reqs = reqs.get('build')
        run_reqs = reqs.get('run')
        if build_reqs:
            add_requirements(build, build_reqs, lib)
            add_requirements(all_builds_latest_tagged, build_reqs, lib)
            nx.draw_networkx(build, ax=build_ax)

        if run_reqs:
            add_requirements(run, run_reqs, lib)
            add_requirements(all_runs_latest_tagged, run_reqs, lib)
            nx.draw_networkx(run, ax=row_ax)

In [ ]:
nx.is_directed_acyclic_graph(all_builds_latest_tagged)

In [ ]:
nx.is_directed_acyclic_graph(all_runs_latest_tagged)

In [ ]:
fig, ax = plt.subplots(figsize=(50,50))
nx.draw_networkx(all_builds_dev_only, ax=ax)
ax.set_title("All build requirements, dev recipes only")

In [ ]:
fig, ax = plt.subplots(figsize=(50,50))
nx.draw_networkx(all_runs_dev_only, ax=ax)
ax.set_title("All runtime requirements, dev recipes only")

In [ ]:
fig, ax = plt.subplots(figsize=(50,50))
nx.draw_networkx(all_builds_latest_tagged, ax=ax)
ax.set_title("All build requirements, latest tagged recipes")

In [ ]:
fig, ax = plt.subplots(figsize=(50,50))
nx.draw_networkx(all_runs_latest_tagged, ax=ax)
ax.set_title("All runtime requirements, latest tagged recipes")

In [ ]:
sorted(all_runs_latest_tagged.nodes())

In [ ]:
all_runs_latest_tagged.subgraph??

In [ ]:
g = all_runs_latest_tagged.subgraph('dataportal')

In [ ]:
nx.draw_networkx(g)

In [ ]:
all_runs_latest_tagged.edges()

In [ ]:


In [ ]:
all_runs_latest_tagged['clint']

In [ ]: